# ------------------------------------------------------------------------------
# Replication Materials
# 
# title: Eliciting Beliefs as Distributions in Online Surveys
# journal: Political Analysis
# authors: Lucas Leemann, Richard Traunmüller, and Lukas Stoetzer
# date: August 2020
# ------------------------------------------------------------------------------



data <- read.csv("in/PA_add_on_survey_Trump_May2020_May+4,+2020_11.32.csv", sep=",", header=T)

dim(data)
data <- data[-c(1:3),] 


### Median Question
table(data$Q80)
data$Q80[data$Q80==""] <- NA
#data$Q80 <- droplevels(data$Q80)

table(data$Q95)
data$Q95[data$Q95==""] <- NA
#data$Q95 <- droplevels(data$Q95)

table(data$Q97)
data$Q97[data$Q97==""] <- NA
#data$Q97 <- droplevels(data$Q97)

table(data$Q139)
data$Q139[data$Q139==""] <- NA
#data$Q139 <- droplevels(data$Q139)


### Quantile Question Wide
table(data$Q115)
data$Q115[data$Q115==""] <- NA
#data$Q115 <- droplevels(data$Q115)

table(data$Q116_1)
data$Q116_1[data$Q116_1==""] <- NA
#data$Q116_1 <- droplevels(data$Q116_1)

table(data$Q117_1)
data$Q117_1[data$Q117_1==""] <- NA
#data$Q117_1 <- droplevels(data$Q117_1)


### Quantile Question Narrow
table(data$Q132)
data$Q132[data$Q132==""] <- NA
#data$Q132 <- droplevels(data$Q132)

table(data$Q290_4)
data$Q290_4[data$Q290_4==""] <- NA
#data$Q290_4 <- droplevels(data$Q290_4)

table(data$Q291_1)
data$Q291_1[data$Q291_1==""] <- NA
#data$Q291_1 <- droplevels(data$Q291_1)


### Manski Question
table(data$Q121)
data$Q121[data$Q121==""] <- NA
#data$Q121 <- droplevels(data$Q121)

table(data$Q122)
data$Q122[data$Q122==""] <- NA
#data$Q122 <- droplevels(data$Q122)

table(data$Q123)
data$Q123[data$Q123==""] <- NA
#data$Q123 <- droplevels(data$Q123)

table(data$Q125_1)
data$Q125_1[data$Q125_1==""] <- NA
#data$Q125_1 <- droplevels(data$Q125_1)

table(data$Q126_1)
data$Q126_1[data$Q126_1==""] <- NA
#data$Q126_1 <- droplevels(data$Q126_1)

### Visual Bin
table(data$q1)
data$q1[data$q1==""] <- NA
#data$q1 <- droplevels(data$q1)

Nobs <- dim(data)[1]
data$q1_2529 <- NA
data$q1_3034 <- NA
data$q1_3539 <- NA
data$q1_4044 <- NA
data$q1_4549 <- NA
data$q1_5054 <- NA
data$q1_5559 <- NA
data$q1_6064 <- NA
data$q1_6569 <- NA
data$q1_7074 <- NA
data$q1_7579 <- NA
data$q1_8084 <- NA

for (i in 1:Nobs){
  if(length(strsplit(as.character(data$q1[i]), ",")[[1]])==12){
    aa <- as.numeric(strsplit(as.character(data$q1[i]), ",")[[1]])
    #data$q1_2024[i] <- aa[1]
    data$q1_2529[i] <- aa[1]
    data$q1_3034[i] <- aa[2]
    data$q1_3539[i] <- aa[3]
    data$q1_4044[i] <- aa[4]
    data$q1_4549[i] <- aa[5]
    data$q1_5054[i] <- aa[6]
    data$q1_5559[i] <- aa[7]
    data$q1_6064[i] <- aa[8]
    data$q1_6569[i] <- aa[9]
    data$q1_7074[i] <- aa[10]
    data$q1_7579[i] <- aa[11]
    data$q1_8084[i] <- aa[12]
  }
}

rm(aa,i,Nobs)

###

data$PID <- ifelse( data$Q153 %in% c("Leaning Democrat","Weak Democrat","Strong Democrat"),
                                     "Democrat",
            ifelse( data$Q153 %in% c("Leaning Republican","Weak Republican","Strong Republican"),
                                    "Republican","Independent"))
table(data$PID, data$Q153)

### Screener Questions

table(data$Q127)
#data$Q127 <- droplevels(data$Q127)
data$screen1 <- ifelse(data$Q127=="University of Zurich", 1, 0)


prop.table(table(data$screen1))


### Timing Variable

table(data$Duration..in.seconds.)

#data$Duration..in.seconds. <- droplevels(data$Duration..in.seconds.)
data$time <- as.numeric(as.character(data$Duration..in.seconds.))

data$treatment <- NA
data$treatment[is.na(data$Q80)==F] <- "Median"
data$treatment[is.na(data$Q115)==F] <- "Q.Wide"
data$treatment[is.na(data$Q132)==F] <- "Q.Narrow"
data$treatment[is.na(data$Q121)==F] <- "Manski"
data$treatment[is.na(data$q1)==F] <- "Bins"
data$treatment <- factor(data$treatment, levels=c("Manski", "Median", "Q.Wide", "Q.Narrow", "Bins"))

data$experiment <- "Trump"
data.trump <- data[,c("time", "treatment", "experiment")]


table(data$treatment)
time.means <- aggregate(data$time, by=list(treatment=data$treatment), mean, na.rm=T)

m <- lm(time ~ treatment, data=data)
summary(m)
anova(m)

time.median <- aggregate(data$time, by=list(treatment=data$treatment), median, na.rm=T)
time.sd <- aggregate(data$time, by=list(treatment=data$treatment), sd, na.rm=T)
